# -*- coding:utf-8 -*-

import requests
from lxml import etree
from fake_useragent import UserAgent
from http import cookiejar
import json
from datetime import datetime
import time
import re
import csv
from pymongo import MongoClient
from utils.HttpUtils import HttpUtils


requests.packages.urllib3.disable_warnings()  # 忽略HTTPS安全警告


"""
安居客-二手房-经纪人 联系方式
https://wuhu.anjuke.com/tycoon/p2/
"""


class AnJuKe():
    def __init__(self):
        #声明一个CookieJar对象实例来保存cookie
        # requests.utils.dict_from_cookiejar(html.cookies)  # 获取cookies转dict
        self.cookie = cookiejar.CookieJar()
        ua = UserAgent(use_cache_server=False)  # 禁用服务器缓存
        self.headers = {
            # 'User-Agent': ua.random,
            'authority': 'wuhu.anjuke.com',
            'method': 'GET',
            'path': '/tycoon/p2/',
            'scheme': 'https',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
            'accept-encoding': 'gzip, deflate, br',
            'accept-language': 'zh-CN,zh;q=0.9',
            # 'cookie': 'sessid=B4478EB0-5830-18FC-2F2B-68A0A419496D; aQQ_ajkguid=BC9FD452-831D-4C33-45E5-9085A7384D89; lps=http%3A%2F%2Fwww.anjuke.com%2F%3Fpi%3DPZ-baidu-pc-all-biaoti%7Chttps%3A%2F%2Fwww.baidu.com%2Fother.php%3Fsc.K600000ATNCQq8BAh0FfzdXKnTQrHwPnZioGrTsXj_69wURglAk7nBW8g1Pin__SoLIQHnfYZ8mYHckvlWayoxWqZFIq_8AXs9SW84lcPW9TbUVKV7SBwlvyZHIAfIdI2zJ0hcLOJ-Hf0W6UjU792xQ9OFo0ltvfsrhLBmMUrL-5tWXgfbrsD0a88NhnazVzYhzmSXhTuqZ6acY-WR6PMgCDdYhu.DY_NR2Ar5Od663rj6thm_8jViBjEWXkSUSwMEukmnSrZr1wC4eL_8C5RojPak3S5Zm0.TLFWgv-b5HDkrfK1ThPGujYknHb0THY0IAYq_Q2SYeOP0ZN1ugFxIZ-suHYs0A7bgLw4TARqnsKLULFb5UazEVrO1fKzmLmqnfKdThkxpyfqnHRkP103PW6zn6KVINqGujYknHmkPjczP0KVgv-b5HDsPHbLnHTL0AdYTAkxpyfqnHDdn1f0TZuxpyfqn0KWThnqnHb4njR%26ck%3D5622.1.75.369.150.371.147.575%26dt%3D1588726829%26wd%3D%26tpl%3Dtpl_11534_21264_17382%26l%3D1517086822%26us%3DlinkName%253D%2525E6%2525A0%252587%2525E5%252587%252586%2525E5%2525A4%2525B4%2525E9%252583%2525A8-%2525E4%2525B8%2525BB%2525E6%2525A0%252587%2525E9%2525A2%252598%2526linkText%253D%2525E5%2525AE%252589%2525E5%2525B1%252585%2525E5%2525AE%2525A2-%2525E5%252585%2525A8%2525E6%252588%2525BF%2525E6%2525BA%252590%2525E7%2525BD%252591%2525EF%2525BC%25258C%2525E6%252596%2525B0%2525E6%252588%2525BF%252520%2525E4%2525BA%25258C%2525E6%252589%25258B%2525E6%252588%2525BF%252520%2525E6%25258C%252591%2525E5%2525A5%2525BD%2525E6%252588%2525BF%2525E4%2525B8%25258A%2525E5%2525AE%252589%2525E5%2525B1%252585%2525E5%2525AE%2525A2%2525EF%2525BC%252581%2526linkType%253D; twe=2; _ga=GA1.2.1807158143.1588726838; _gid=GA1.2.1706120944.1588726838; 58tj_uuid=06671c4e-8efd-4d60-ba75-6ff3d208fb89; init_refer=https%253A%252F%252Fwww.baidu.com%252Fother.php%253Fsc.K600000ATNCQq8BAh0FfzdXKnTQrHwPnZioGrTsXj_69wURglAk7nBW8g1Pin__SoLIQHnfYZ8mYHckvlWayoxWqZFIq_8AXs9SW84lcPW9TbUVKV7SBwlvyZHIAfIdI2zJ0hcLOJ-Hf0W6UjU792xQ9OFo0ltvfsrhLBmMUrL-5tWXgfbrsD0a88NhnazVzYhzmSXhTuqZ6acY-WR6PMgCDdYhu.DY_NR2Ar5Od663rj6thm_8jViBjEWXkSUSwMEukmnSrZr1wC4eL_8C5RojPak3S5Zm0.TLFWgv-b5HDkrfK1ThPGujYknHb0THY0IAYq_Q2SYeOP0ZN1ugFxIZ-suHYs0A7bgLw4TARqnsKLULFb5UazEVrO1fKzmLmqnfKdThkxpyfqnHRkP103PW6zn6KVINqGujYknHmkPjczP0KVgv-b5HDsPHbLnHTL0AdYTAkxpyfqnHDdn1f0TZuxpyfqn0KWThnqnHb4njR%2526ck%253D5622.1.75.369.150.371.147.575%2526dt%253D1588726829%2526wd%253D%2526tpl%253Dtpl_11534_21264_17382%2526l%253D1517086822%2526us%253DlinkName%25253D%252525E6%252525A0%25252587%252525E5%25252587%25252586%252525E5%252525A4%252525B4%252525E9%25252583%252525A8-%252525E4%252525B8%252525BB%252525E6%252525A0%25252587%252525E9%252525A2%25252598%252526linkText%25253D%252525E5%252525AE%25252589%252525E5%252525B1%25252585%252525E5%252525AE%252525A2-%252525E5%25252585%252525A8%252525E6%25252588%252525BF%252525E6%252525BA%25252590%252525E7%252525BD%25252591%252525EF%252525BC%2525258C%252525E6%25252596%252525B0%252525E6%25252588%252525BF%25252520%252525E4%252525BA%2525258C%252525E6%25252589%2525258B%252525E6%25252588%252525BF%25252520%252525E6%2525258C%25252591%252525E5%252525A5%252525BD%252525E6%25252588%252525BF%252525E4%252525B8%2525258A%252525E5%252525AE%25252589%252525E5%252525B1%25252585%252525E5%252525AE%252525A2%252525EF%252525BC%25252581%252526linkType%25253D; new_uv=1; als=0; new_session=0; ajk_member_captcha=8d94c4c5c8fb54dc4deec7c1e04f6d43; wmda_uuid=ead6951da4e833ff420fa34165e1f58f; wmda_new_uuid=1; wmda_session_id_6289197098934=1588728491401-17156c04-2a75-82a2; wmda_visited_projects=%3B6289197098934; ctid=152; xzfzqtoken=9%2FLpDK35PRgfSme0jU4kdpkiTdWTZxWe8bTx%2F5q2PXzzRuH3medkIPlrSkYgCRBqin35brBb%2F%2FeSODvMgkQULA%3D%3D; __xsptplusUT_8=1; __xsptplus8=8.1.1588726838.1588730559.25%232%7Cwww.baidu.com%7C%7C%7C%7C%23%23JB59KhqGYe3qKVJDu8Iq0DS7m-ik0qgI%23',
            'dnt': '1',
            'referer': 'https://wuhu.anjuke.com/tycoon/?from=esf_list_navigation',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'same-origin',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36'
            # 'user-agent': 'Mozilla/5.0.html (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.html.2 Mobile/8J2 Safari/6533.18.5'
        }


    def get_contents(self, cursor):
        postData = {

        }
        url = f"https://wuhu.anjuke.com/tycoon/p{cursor}/"
        html = HttpUtils.do_request("GET", url, self.headers, "")
        root = etree.HTML(html.text)
        if len(root.xpath('//div[@class="jjr-info"]')) > 0:
            for r in root.xpath('//div[@class="jjr-info"]'):
                dict_data = dict()
                dict_data['name'] = r.xpath('.//h3/a/text()')[0]
                url_detail_mobile = r.xpath('.//h3/a/@href')[0].replace("gongsi-", "").split("/")[3]
                url_detail_mobile = "https://m.anjuke.com/wuh/" + url_detail_mobile
                dict_data['mendian'] = "".join(r.xpath('.//p[@class="jjr-desc"]/a/text()'))
                dict_data['mendian_address'] = "".join(r.xpath('.//p[@class="jjr-desc"]/span[2]/text()'))
                # 获取电话
                self.get_tel(url_detail_mobile, dict_data)
                time.sleep(2)
            return 1
        else:
            print(f"============验证码{url}=============")
            return 0

    # 获取电话
    def get_tel(self, url, dict_data):
        html = HttpUtils.do_request("GET", url, self.headers, "")
        root = etree.HTML(html.text)
        dict_data['mendian_1'] = "".join(root.xpath('//p[@class="broker-header-company"]//text()')).replace("\n", "").replace(" ", "").strip()
        dict_data['tel'] = "".join(root.xpath('//a[@id="brokerintro-concat-phone"]/@data-broker-phone'))
        self.insertItem("安居客_芜湖_经纪人", dict_data)
        print(str(dict_data))

    def insertItem(self, tableName, data):
        my_set = db[tableName]
        my_set.insert_one(data)


if __name__ == '__main__':
    conn = MongoClient('127.0.0.1', 27017)
    db = conn["Test"]
    anjuke = AnJuKe()
    for i in range(1, 35):
        print(f"============第【{i}】页=============")
        reslut = anjuke.get_contents(i)
        if reslut == 0:
            break
